/********************
* Exclusions for cohorts: NSW-ON-NY comparison
*
* Input: cohort tables
*
* Output: count of episodes/patients at each stage, cohort tables
*
*******************/

/* Add age variable to original cohort */ 
%macro age_variable (indata,date_current);
data &indata;
set &indata; 
format Birthday date9.;
Birthday = input(MthYr_birth,anydtdte7.);
age = intck('year',Birthday,&date_current);
drop Birthday;
%mend age_variable;

/* Create sa2 NSW list */
/* postcodes within NSW */ 
* convert to dataframe;
proc import out = sa2_2011 datafile = 'C:\Users\kcha0642\Documents\NSW-ON-NY\on-ny-nsw-kcha0642\on-ny-nsw\SA2_2011_AUST' 
	dbms = xlsx replace; 
	getnames = yes; 
run;  

data sa2_2011; 
	set sa2_2011; 
	keep SA2_MAINCODE_2011 SA2_NAME_2011 STATE_NAME_2011 SA2_2011_c; 
	format SA2_2011_c $9.; 
	SA2_2011_c = put(SA2_MAINCODE_2011,z9.); 
	if STATE_NAME_2011 = "New South Wales" then output; 
run;

proc sort data = sa2_2011 
	dupout = sa2_2011_2 nodupkey ;
	by SA2_2011_c ;
run; 

/********************
* ppn_ep_count
* record excluded patients and episodes at each stage
* Input: cohort tables, label
* Output: print count of episodes, ppn at stage X,
	extable = exclusions table count; 
*******************/
%macro ppn_ep_count (cohortdata,extable,stage); 	
ods output nlevels = tempcount; 
proc freq data = &cohortdata nlevels ;
	where cohort_include = 1; 
	tables ppn recnum/noprint; 
	title &stage;
run; 
proc transpose data = tempcount out=tempcount;
run; 
data tempcount; 
	set tempcount; 
	exclusions = &stage; 
run; 
proc sql;
	insert into &extable
	select exclusions, col1, col2 from tempcount;
quit; 
%mend ppn_ep_count; 

/********************
* exclusions
* Input: 
* 	indata = table after add-seq-vars
*	outdata = table to save
*	label = procedure name (for output)
*	firstdate = start date of cohort (same as add-seq-vars)
*	lastdate = date of last discharge
*	period = number of years of cohort 
* Output: final cohort, output with counts for procedures; hospital volumes
*******************/

%macro exclusions (indata, outdata, label, firstdate, lastdate, period);

%age_variable(indata = &indata, date_current = index_date);

data cohort; 
	set &indata; 
	keep recnum ppn age SA2_2011_CODE sex facility_identifier_recode2 index_date 
	start_eoc end_eoc mode_of_separation_recode  fileseq episode_of_care
	diagnosis_codeP procedure_codeP episode_start_date procedure_dateP 
	episode_start_time episode_end_date  episode_end_time facility_trans_to_recode2;
	where index_flag = 1; 
run; 

data cohort; 
	set cohort; 
	* include in final cohort; 
	cohort_include = 1; 
	* keep for later exclusion criteria; 
	inplay = 1; 
run; 

data extable; 
	length exclusions $100; 
	exclusions = '.';
	persons = 0; 
	episodes = 0; 
run; 

/* original count */
%ppn_ep_count(cohort,extable,'0. No exclusions');

/* remove female patients for prostatectomy */ 
data cohort; 
	set cohort; 
	if "&label" = "prostatectomy" then do; 
		if sex = '2' then do;
			cohort_include = 0; 
			inplay = 0; 
		end;  
	end; 
run; 

%ppn_ep_count(cohort,extable,'0. Prostatectomy only - remove female'); 

/* enrolment period */
* first procedure after firstdate; 
data cohort; 
	set cohort; 
	if index_date < &firstdate or index_date > &lastdate then cohort_include = 0; 
	* inplay = 1 as these may exclude other index procedures ; 
run; 

%ppn_ep_count(cohort,extable,"0a. Procedure before &firstdate. (&label.) and after &lastdate.")

/************************************/
/* discharged after March 31, 2018 */
/************************************/
data cohort; 
	set cohort; 
	month = put(end_eoc,yymmd5.); 
run; 
data cohort; 
	set cohort; 
	if end_eoc > &lastdate then cohort_include = 0; 
run; 

%ppn_ep_count(cohort,extable,"1. Excluded discharged after Mar 31, 2018 (&label.)");

/************************************/
/* exclude procedures that could not be linked to a specific hospital */ 
/************************************/
data cohort; 
	set cohort; 
	if missing(facility_identifier_recode2) then do;
		cohort_include = 0; 
		inplay = 0; /* if any procedures could not be linked to a hospital, then do not use them to exclude other procedures */ 
	end;  
run; 

%ppn_ep_count(cohort,extable,"2. Excluded procedures that could not be linked to a specific hospital (&label.)");

/************************************/
/* exclude LOS < 1 day */
/************************************/
data cohort; 
	set cohort; 
	los_eoc = end_eoc - start_eoc; 
run; 

data cohort; 
	set cohort;
	if los_eoc < 1 then do;
		cohort_include = 0; 
		inplay = 0; /* do not use these to exclude other procedures */ 
	end; 
run; 

%ppn_ep_count(cohort,extable,"3. Excluded LOS < 1 day (&label.)");

/************************************/
/* patients who underwent the same procedure as their index procedure during the 90 days look-back */
/************************************/
* so if patients have a previous procedure + this was in the lookback window (firstdate - 90 days to firstdate);
* create lookback table (from cohort table only) ; 
data lookbackcohort; 
	set cohort; 
	where inplay = 1; 
	lookback_procedure = 1; 
run; 

proc sql; 
	create table cohort_all as
	select c.*, l.lookback_procedure
	from cohort as c
	left join lookbackcohort as l
	on c.ppn = l.ppn and 
		c.recnum ne l.recnum and
		0 <= (c.index_date - l.index_date) <= 90 and
		l.index_date <= &firstdate; 

proc sort data = cohort_all; 
	by recnum; 
run; 

* remove duplicates (in case multiple procedures in lookback); 
data cohort_all; 
	set cohort_all; 
	by recnum; 
	retain lookback_procedure_all; 
	if first.recnum then lookback_procedure_all = 0; 
	* update to count lookback procedure - this shouldnt be necessary but just including as a fail safe;
	if lookback_procedure = 1 then lookback_procedure_all = 1; 
	if last.recnum then output; 
run; 

data cohort; 
	set cohort_all; 
	if lookback_procedure_all = 1 then do; 
		cohort_include = 0; 
	end; 
run; 

%ppn_ep_count(cohort,extable,"4. Excluded patients with same procedure in lookback period (&label.)");

/************************************/
/* procedures were underwent within 90 days after discharge from the previous procedure of the same type */
/************************************/
data lookbackcohort; 
	set cohort; 
	where inplay = 1; 
	procedure_90 = 1; 
run; 

* join to cohort table (if same procedure occurred 90 days prior); 
proc sql; 
	create table cohort_all as 
	select c.*, l.procedure_90
	from cohort as c
	left join lookbackcohort as l 
	on c.ppn = l.ppn and 
		c.recnum ne l.recnum and 
		(0 <= (c.index_date - l.end_eoc) <= 90 or
		/* some nephrectomy procedures are done twice in the episode - we should count these 
		in the 90 days as well, except the discharge will be the same */ 
		 (c.episode_of_care = l.episode_of_care and 
			l.fileseq < c.fileseq and
			0 <= (c.index_date - l.index_date) <= 90) ) ;
		 
proc sort data = cohort_all; 
	by recnum; 
run; 

* remove duplicates (in case multiple procedures in lookback); 
data cohort_all; 
	set cohort_all; 
	by recnum; 
	retain procedure_90_all; 
	if first.recnum then procedure_90_all = 0; 
	* update to count lookback procedure - this shouldnt be necessary but just including as a fail safe;
	if procedure_90 = 1 then procedure_90_all = 1; 
	if last.recnum then output; 
run; 

data cohort; 
	set cohort_all; 
	if procedure_90_all = 1 then do; 
		cohort_include = 0; 
	end;  
run; 

proc sql; 
	drop table lookbackcohort, cohort_all;
	quit; 
	
%ppn_ep_count(cohort,extable,"5. Excluded patients within 90 days after discharge from same procedure (&label.)");

/************************************/
/* missing or invalid health insurance number */ 
/************************************/
data cohort; 
	set cohort; 
	if missing(ppn) then do;
		cohort_include = 0; 
		inplay = 0; 
	end; 
run; 

%ppn_ep_count(cohort,extable,"6. Excluded episodes with missing ppn id (&label.)"); 

/************************************/
/* missing age or sex */
/************************************/
data cohort; 
	set cohort; 
	if missing(age) then cohort_include = 0; 
run; 

data cohort; 
	set cohort;
	if missing(sex) then cohort_include = 0; 
run; 

%ppn_ep_count(cohort,extable,"7. Excluded episodes with missing age or sex (&label.)");

/************************************/
/* age < 18 or > 105 years at the time of index procedure */
/************************************/
data cohort; 
	set cohort; 
	if age > 105 or age < 18 then cohort_include = 0;
run; 

%ppn_ep_count(cohort,extable,"8. Excluded patients younger than 18 or older than 105 at procedure index date (&label.)");

/************************************/
/* patients who underwent any of our 4 procedures in NSW who resided outside of NSW (based on postal code)*/
/************************************/
data cohort; 
	set cohort; 
	if missing(SA2_2011_CODE) then cohort_include = 0; 
run; 
 
%ppn_ep_count(cohort,extable,"9a. Excluded episodes with no SA2_2011 (&label.)");
	
proc sql; 
	create table nsw_check as
	select a.*,b.STATE_NAME_2011
	from cohort as a
	left join sa2_2011 as b
		on a.SA2_2011_CODE = b.SA2_2011_c; 

data cohort; 
	set nsw_check ;
	if STATE_NAME_2011 = '' then cohort_include = 0; 
run; 

proc sql; 
	drop table nsw_check;
	quit; 

%ppn_ep_count(cohort,extable,"9. Excluded patients outside of NSW - based on SA2 2011 code (&label.)");

/************************************/
/*death on or prior to the index (procedure) date*/
/************************************/
* physically impossible, but checking if there are any cases where death has occurred in
* previous episode (within the episode of care); 

* get all episodes for cohorts; 
proc sql; 
	create table temp as
	select a.ppn, a.end_eoc, a.mode_of_separation_recode
	from &indata as a 
	inner join cohort as t
	on a.ppn = t.ppn; 

* all patients that died in hospital at some point in time; 
data temp; 
	set temp; 
	format death_date date9.;
	death_date = end_eoc; 
	if mode_of_separation_recode in (6,7) then output;
run; 

proc sort data = temp noduprecs;
	by _all_; 
run; 

data temp; 
	set temp; 
	by ppn end_eoc; 
	if last.ppn then output; 
run; 
	
* join death_date to cohort table; 
proc sql; 
	create table temp2 as
	select a.*, b.death_date
	from cohort as a
	left join temp as b
	on a.ppn = b.ppn; 

data cohort; 
	set temp2; 
	if not missing(death_date) then do; 
		if death_date <= index_date then cohort_include = 0; 
	end; 
run; 

%ppn_ep_count(cohort,extable,"10. Excluded patients died on or prior to index procedure date (&label.)");

proc sql; drop table temp, temp2; quit;

/************************************/
/* exclude procedures that were performed in hospitals with mean volume < 1 procedure-per-year */
* (so 5 procedures over 5-years) over the full study period. Thus both patient volume and hospital 
* volume will be limited to procedures performed in hospitals meeting our volume threshold (> 1 procedure per-year).;
/************************************/

* create hospital table;  
proc sql; 
	create table hospital_means as
	select facility_identifier_recode2, count(distinct(recnum)) as total_count
	from cohort
	where cohort_include = 1
	group by facility_identifier_recode2; 

* join to cohorts ; 
proc sql; 
	create table temp2 as
	select a.*,b.total_count
	from cohort as a
	left join hospital_means as b
	on a.facility_identifier_recode2 = b.facility_identifier_recode2;

data cohort; 
	set temp2; 
run; 

* drop facilities with low average procedure counts; 
data cohort; 
	set cohort; 
	if total_count < &period then cohort_include = 0; 
run; 

%ppn_ep_count(cohort,extable,"11. Excluded facilities with < &period. procedures over &period. years (&label.)");

* join to full data and keep included episodes only  
data cohort; 
	set cohort; 
	where cohort_include = 1; 
run; 
proc sort data = cohort; 
	by fileseq; 
run; 
proc sql; 
	create table &outdata as
	select c.*, a.*
	from cohort as c
	left join &indata as a
	on c.recnum = a.recnum; 

proc sql;
	drop table cohort, temp1, temp2, tempcount;

* find difference between stages for exlusion table; 
data extable; 
	set extable; 
	if exclusions = '.' then delete; 
run; 
data extable; 
	set extable; 
	person_difference = lag(persons) - persons; 
	ep_difference = lag(episodes) - episodes; 
run; 

* save as excel table to cut down on copy and paste; 
proc export 
	data = extable
	dbms=xlsx
	outfile="Z:\LowValueCare\APDC\on-ny-nsw\exclusion &label."
	replace; 
run; 

%mend exclusions;
